import pandas as pd
import pandas as pd
import numpy as np
import pdb

mean_costs = pd.read_csv('/REDACTED/data/metadata/mean_cost_decile_ac.csv')
median_costs = pd.read_csv('/REDACTED/data/metadata/mean_cost_decile_ac.csv')
mean_costs_wins = pd.read_csv('/REDACTED/data/metadata/mean_cost_decile_ac_wins.csv')
median_costs_wins = pd.read_csv('/REDACTED/data/metadata/median_cost_decile_ac_wins.csv')

mean_costs_ac = pd.read_csv('/REDACTED/data/metadata/mean_cost_ac.csv')
median_costs_ac = pd.read_csv('/REDACTED/data/metadata/mean_cost_ac.csv')
mean_costs_wins_ac = pd.read_csv('/REDACTED/data/metadata/mean_cost_ac_wins.csv')
median_costs_wins_ac = pd.read_csv('/REDACTED/data/metadata/median_cost_ac_wins.csv')

mean_costs_v2 = pd.read_csv('/REDACTED/data/metadata/mean_cost_decile_ac_V2.csv')
median_costs_v2 = pd.read_csv('/REDACTED/data/metadata/mean_cost_decile_ac_V2.csv')
mean_costs_wins_v2 = pd.read_csv('/REDACTED/data/metadata/mean_cost_decile_ac_wins_V2.csv')
median_costs_wins_v2 = pd.read_csv('/REDACTED/data/metadata/median_cost_decile_ac_wins_V2.csv')

mean_costs_ac_v2 = pd.read_csv('/REDACTED/data/metadata/mean_cost_ac_V2.csv')
median_costs_ac_v2 = pd.read_csv('/REDACTED/data/metadata/mean_cost_ac_V2.csv')
mean_costs_wins_ac_v2 = pd.read_csv('/REDACTED/data/metadata/mean_cost_ac_wins_V2.csv')
median_costs_wins_ac_v2 = pd.read_csv('/REDACTED/data/metadata/median_cost_ac_wins_V2.csv')



mean_dict = {decile:{ac:mean_costs[(mean_costs.activity_code==ac)&(mean_costs.income_bucket==decile)].cost.iloc[0] for ac in mean_costs[mean_costs.income_bucket==decile].activity_code.unique()} for decile in mean_costs.income_bucket.unique()}
median_dict = {decile:{ac:median_costs[(median_costs.activity_code==ac)&(median_costs.income_bucket==decile)].cost.iloc[0] for ac in median_costs[median_costs.income_bucket==decile].activity_code.unique()} for decile in median_costs.income_bucket.unique()}
mean_dict_w = {decile:{ac:mean_costs_wins[(mean_costs_wins.activity_code==ac)&(mean_costs_wins.income_bucket==decile)].cost_wins.iloc[0] for ac in mean_costs_wins[mean_costs_wins.income_bucket==decile].activity_code.unique()} for decile in mean_costs_wins.income_bucket.unique()}
median_dict_w = {decile:{ac:median_costs_wins[(median_costs_wins.activity_code==ac)&(median_costs_wins.income_bucket==decile)].cost_wins.iloc[0] for ac in median_costs_wins[median_costs_wins.income_bucket==decile].activity_code.unique()} for decile in median_costs_wins.income_bucket.unique()}

mean_dict_v2 = {decile:{ac:mean_costs_v2[(mean_costs_v2.activity_code==ac)&(mean_costs_v2.income_bucket==decile)].cost.iloc[0] for ac in mean_costs_v2[mean_costs_v2.income_bucket==decile].activity_code.unique()} for decile in mean_costs_v2.income_bucket.unique()}
median_dict_v2 = {decile:{ac:median_costs_v2[(median_costs_v2.activity_code==ac)&(median_costs_v2.income_bucket==decile)].cost.iloc[0] for ac in median_costs_v2[median_costs_v2.income_bucket==decile].activity_code.unique()} for decile in median_costs_v2.income_bucket.unique()}
mean_dict_w_v2 = {decile:{ac:mean_costs_wins_v2[(mean_costs_wins_v2.activity_code==ac)&(mean_costs_wins_v2.income_bucket==decile)].cost_wins.iloc[0] for ac in mean_costs_wins_v2[mean_costs_wins_v2.income_bucket==decile].activity_code.unique()} for decile in mean_costs_wins_v2.income_bucket.unique()}
median_dict_w_v2 = {decile:{ac:median_costs_wins_v2[(median_costs_wins_v2.activity_code==ac)&(median_costs_wins_v2.income_bucket==decile)].cost_wins.iloc[0] for ac in median_costs_wins_v2[median_costs_wins_v2.income_bucket==decile].activity_code.unique()} for decile in median_costs_wins_v2.income_bucket.unique()}


def lookupCostMean(ivar,avar,v2=False):
    if v2==False:
        if ivar in mean_dict.keys() and avar in mean_dict[ivar].keys():
            return mean_dict[ivar][avar]
        else:
            return np.nan
    else:
        if ivar in mean_dict_v2.keys() and avar in mean_dict_v2[ivar].keys():
            return mean_dict_v2[ivar][avar]
        else:
            return np.nan

def lookupCostMedian(ivar,avar,v2=False):
    if v2==False:
        if ivar in median_dict.keys() and avar in median_dict[ivar].keys():
            return median_dict[ivar][avar]
        else:
            return np.nan
    else:
        if ivar in median_dict_v2.keys() and avar in median_dict_v2[ivar].keys():
            return median_dict_v2[ivar][avar]
        else:
            return np.nan

def lookupCostMean_w(ivar,avar,v2=False):
    if v2==False:
        if ivar in mean_dict_w.keys() and avar in mean_dict_w[ivar].keys():
            return mean_dict_w[ivar][avar]
        else:
            return np.nan
    else:
        if ivar in mean_dict_w_v2.keys() and avar in mean_dict_w_v2[ivar].keys():
            return mean_dict_w_v2[ivar][avar]
        else:
            return np.nan

def lookupCostMedian_w(ivar,avar,v2=False):
    if v2==False:
        if ivar in median_dict_w.keys() and avar in median_dict_w[ivar].keys():
            return median_dict_w[ivar][avar]
        else:
            return np.nan
    else:
        if ivar in median_dict_w_v2.keys() and avar in median_dict_w_v2[ivar].keys():
            return median_dict_w_v2[ivar][avar]
        else:
            return np.nan
        

print(mean_costs_ac)
print(mean_costs_ac.set_index('activity_code').to_dict())
mean_dict_ac = (mean_costs_ac.set_index('activity_code').to_dict())['cost']
median_dict_ac = median_costs_ac.set_index('activity_code').to_dict()['cost']
mean_dict_w_ac = mean_costs_wins_ac.set_index('activity_code').to_dict()['cost_wins']
median_dict_w_ac = median_costs_wins_ac.set_index('activity_code').to_dict()['cost_wins']
print(mean_dict_ac)

print(mean_costs_ac_v2)
print(mean_costs_ac_v2.set_index('activity_code').to_dict())
mean_dict_ac_v2 = (mean_costs_ac_v2.set_index('activity_code').to_dict())['cost']
median_dict_ac_v2 = median_costs_ac_v2.set_index('activity_code').to_dict()['cost']
mean_dict_w_ac_v2 = mean_costs_wins_ac_v2.set_index('activity_code').to_dict()['cost_wins']
median_dict_w_ac_v2 = median_costs_wins_ac_v2.set_index('activity_code').to_dict()['cost_wins']
print(mean_dict_ac_v2)

def lookupCostMeanAC(avar,v2=False):
    if v2==False:
        if avar in mean_dict_ac.keys():
            return mean_dict_ac[avar]
        else:
            return np.nan
    else:
        if avar in mean_dict_ac_v2.keys():
            return mean_dict_ac_v2[avar]
        else:
            return np.nan

def lookupCostMedianAC(avar,v2=False):
    if v2==False:
        if median_dict_ac.keys():
            return median_dict_ac[avar]
        else:
            return np.nan
    else:
        if median_dict_ac_v2.keys():
            return median_dict_ac_v2[avar]
        else:
            return np.nan

def lookupCostMeanAC_w(avar,v2=False):
    if v2==False:
        if avar in mean_dict_w_ac.keys():
            return mean_dict_w_ac[avar]
        else:
            return np.nan
    else:
        if avar in mean_dict_w_ac_v2.keys():
            return mean_dict_w_ac_v2[avar]
        else:
            return np.nan

def lookupCostMedianAC_w(avar,v2=False):
    if v2==False:
        if avar in median_dict_w_ac.keys():
            return median_dict_w_ac[avar]
        else:
            return np.nan


def getCostsACOnly(data,acvarb= 'activity_code',median=False,wins=True,v2=False):
    data = data.copy() 
    if v2==False:
        if median:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedianAC_w(x[acvarb]),axis=1)
            else:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedianAC(x[acvarb]),axis=1)
        else:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMeanAC_w(x[acvarb]),axis=1) 
            else:       
                data['exp_cost'] = data.apply(lambda x: lookupCostMeanAC(x[acvarb]),axis=1) 
        return data
    else:
        if median:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedianAC_w(x[acvarb],v2=True),axis=1)
            else:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedianAC(x[acvarb],v2=True),axis=1)
        else:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMeanAC_w(x[acvarb],v2=True),axis=1) 
            else:       
                data['exp_cost'] = data.apply(lambda x: lookupCostMeanAC(x[acvarb],v2=True),axis=1) 
        return data


def getCosts(data,incomevarb='income_bucket_nrs',acvarb='activity_code',median=False,wins=True,v2=False):
    data = data.copy()
    #pdb.set_trace()
    if v2==False:
        if median:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedian_w(x[incomevarb],x[acvarb]),axis=1)
            else:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedian(x[incomevarb],x[acvarb]),axis=1)
        else:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMean_w(x[incomevarb],x[acvarb]),axis=1) 
            else:       
                data['exp_cost'] = data.apply(lambda x: lookupCostMean(x[incomevarb],x[acvarb]),axis=1) 
        return data
    else:
        if median:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedian_w(x[incomevarb],x[acvarb],v2=True),axis=1)
            else:
                data['exp_cost'] = data.apply(lambda x: lookupCostMedian(x[incomevarb],x[acvarb],v2=True),axis=1)
        else:
            if wins:
                data['exp_cost'] = data.apply(lambda x: lookupCostMean_w(x[incomevarb],x[acvarb],v2=True),axis=1) 
            else:       
                data['exp_cost'] = data.apply(lambda x: lookupCostMean(x[incomevarb],x[acvarb],v2=True),axis=1) 
        return data

if __name__=='__main__':
    testdat = pd.DataFrame({'income_bucket':[1,1,2,5],'activity_code':[270,272,271,275]})
    print(getCosts(testdat,incomevarb='income_bucket'))
    print(getCostsACOnly(testdat))

if __name__=='__main__':
    testdat = pd.DataFrame({'income_bucket':[1,1,2,5],'activity_code':[270,272,271,275]})
    print(getCosts(testdat,incomevarb='income_bucket',v2=True))
    print(getCostsACOnly(testdat,v2=True))
    
